library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
bike_sharing <- read_csv("~/Downloads/bikesharing.csv")
## Rows: 731 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): season, month, weekday, weather
## dbl (7): year, temperature_F, casual, registered, count, humidity, windspeed
## lgl (2): holiday, workingday
## date (2): date, date_noyear
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(bike_sharing)
## season month year date
## Length:731 Length:731 Min. :2011 Min. :2011-01-01
## Class :character Class :character 1st Qu.:2011 1st Qu.:2011-07-02
## Mode :character Mode :character Median :2012 Median :2012-01-01
## Mean :2012 Mean :2012-01-01
## 3rd Qu.:2012 3rd Qu.:2012-07-01
## Max. :2012 Max. :2012-12-31
## date_noyear holiday weekday workingday
## Min. :2000-01-01 Mode :logical Length:731 Mode :logical
## 1st Qu.:2000-04-01 FALSE:710 Class :character FALSE:231
## Median :2000-07-02 TRUE :21 Mode :character TRUE :500
## Mean :2000-07-01
## 3rd Qu.:2000-10-01
## Max. :2000-12-31
## weather temperature_F casual registered
## Length:731 Min. :36.40 Min. : 2.0 Min. : 20
## Class :character 1st Qu.:56.85 1st Qu.: 315.5 1st Qu.:2497
## Mode :character Median :68.80 Median : 713.0 Median :3662
## Mean :68.56 Mean : 848.2 Mean :3656
## 3rd Qu.:80.35 3rd Qu.:1096.0 3rd Qu.:4776
## Max. :95.60 Max. :3410.0 Max. :6946
## count humidity windspeed
## Min. : 22 Min. : 0.00 Min. : 1.50
## 1st Qu.:3152 1st Qu.:52.00 1st Qu.: 9.00
## Median :4548 Median :62.70 Median :12.10
## Mean :4504 Mean :62.79 Mean :12.76
## 3rd Qu.:5956 3rd Qu.:73.00 3rd Qu.:15.60
## Max. :8714 Max. :97.30 Max. :34.00
bike_sharing %>%
ggplot(aes(weather)) +
geom_bar(fill="dodgerblue")
bike_sharing %>%
filter(season=="summer") %>%
ggplot(aes(weather)) +
geom_bar(fill = "dodgerblue")
bike_sharing %>%
ggplot(aes(humidity, count)) +
geom_point(color="dodgerblue")
bike_sharing %>%
ggplot(aes(windspeed, count)) +
geom_point(color="dodgerblue")
bike_sharing %>%
ggplot(aes(temperature_F, count)) +
geom_point(color="dodgerblue")
bike_sharing %>%
ggplot(aes(temperature_F, count, color=season)) +
geom_point()
bike_sharing %>%
ggplot(aes(temperature_F, count, color=season, shape = weather)) +
geom_point()
bike_sharing %>%
ggplot(aes(temperature_F, count, color=weather, shape = season)) +
geom_point()
bike_sharing %>%
mutate(frac_casual = casual/count) %>%
ggplot(aes(temperature_F, count, color=season, shape = weather, size=frac_casual)) +
geom_point()
bike_sharing %>%
mutate(frac_casual = casual/count) %>%
ggplot(aes(temperature_F, count, color=frac_casual)) +
geom_point()
bike_sharing %>%
ggplot(aes(temperature_F, count, color=weather, shape = season)) +
geom_point() +
labs(title = "Effect of weather conditions on number of riders", x = "temperature (F)", y = "number of riders")
bike_sharing %>%
filter(season == "summer") %>%
arrange(count) %>%
head()
## # A tibble: 6 × 15
## season month year date date_noyear holiday weekday worki…¹ weather
## <chr> <chr> <dbl> <date> <date> <lgl> <chr> <lgl> <chr>
## 1 summer August 2011 2011-08-27 2000-08-27 FALSE Saturday FALSE cloudy
## 2 summer September 2011 2011-09-08 2000-09-08 FALSE Thursday TRUE rain
## 3 summer September 2011 2011-09-07 2000-09-07 FALSE Wednesd… TRUE rain
## 4 summer September 2011 2011-09-06 2000-09-06 FALSE Tuesday TRUE rain
## 5 summer July 2011 2011-07-23 2000-07-23 FALSE Saturday FALSE clear
## 6 summer September 2011 2011-09-05 2000-09-05 TRUE Monday FALSE cloudy
## # … with 6 more variables: temperature_F <dbl>, casual <dbl>, registered <dbl>,
## # count <dbl>, humidity <dbl>, windspeed <dbl>, and abbreviated variable name
## # ¹​workingday
bike_sharing %>%
filter(season == "fall") %>%
arrange(count) %>%
head()
## # A tibble: 6 × 15
## season month year date date_noyear holiday weekday worki…¹ weather
## <chr> <chr> <dbl> <date> <date> <lgl> <chr> <lgl> <chr>
## 1 fall October 2012 2012-10-29 2000-10-29 FALSE Monday TRUE rain
## 2 fall October 2011 2011-10-29 2000-10-29 FALSE Saturday FALSE rain
## 3 fall December 2011 2011-12-07 2000-12-07 FALSE Wednesday TRUE rain
## 4 fall October 2012 2012-10-30 2000-10-30 FALSE Tuesday TRUE cloudy
## 5 fall November 2011 2011-11-24 2000-11-24 TRUE Thursday FALSE clear
## 6 fall November 2011 2011-11-22 2000-11-22 FALSE Tuesday TRUE rain
## # … with 6 more variables: temperature_F <dbl>, casual <dbl>, registered <dbl>,
## # count <dbl>, humidity <dbl>, windspeed <dbl>, and abbreviated variable name
## # ¹​workingday
What affects the number of casual riders on a given day?
bike_sharing %>%
mutate(frac_casual = casual/count) %>%
ggplot(aes(date, frac_casual)) +
geom_point(color="dodgerblue")
bike_sharing %>%
filter(year == 2011) %>%
mutate(frac_casual = casual/count) %>%
ggplot(aes(date, frac_casual)) +
geom_point(color="dodgerblue")
bike_sharing %>%
filter(year == 2011) %>%
mutate(frac_casual = casual/count) %>%
ggplot(aes(date, frac_casual, color = season)) +
geom_point()
bike_sharing %>%
filter(year == 2011) %>%
mutate(frac_casual = casual/count) %>%
ggplot(aes(date, frac_casual, color = temperature_F)) +
geom_point()
bike_sharing %>%
filter(year == 2011) %>%
mutate(frac_casual = casual/count) %>%
ggplot(aes(date, frac_casual, color = humidity)) +
geom_point()
bike_sharing %>%
filter(year == 2011) %>%
mutate(frac_casual = casual/count) %>%
ggplot(aes(date, frac_casual, color = workingday)) +
geom_point()
bike_sharing %>%
filter(year == 2011) %>%
mutate(frac_casual = casual/count) %>%
ggplot(aes(date, frac_casual, color = workingday, size = holiday)) +
geom_point()
## Warning: Using size for a discrete variable is not advised.
bike_sharing %>%
filter(year == 2011) %>%
mutate(frac_casual = casual/count) %>%
ggplot(aes(date, frac_casual, color = workingday)) +
geom_point() +
labs(title="Fraction of casual riders by date in 2011", y = "fraction of casual riders")
bike_sharing %>%
mutate(frac_casual = casual/count) %>%
filter(workingday == TRUE) %>%
filter(year == 2011) %>%
arrange(desc(frac_casual)) %>%
select(frac_casual, date)
## # A tibble: 250 × 2
## frac_casual date
## <dbl> <date>
## 1 0.392 2011-11-25
## 2 0.273 2011-03-18
## 3 0.255 2011-04-11
## 4 0.238 2011-07-15
## 5 0.236 2011-04-04
## 6 0.232 2011-07-01
## 7 0.221 2011-07-05
## 8 0.217 2011-08-23
## 9 0.214 2011-08-12
## 10 0.210 2011-08-05
## # … with 240 more rows
bike_sharing %>%
ggplot(aes(weather)) +
geom_bar(fill = "dodgerblue")
bike_sharing %>%
ggplot(aes(weather, fill=season)) +
geom_bar()
bike_sharing %>%
ggplot(aes(weather, fill=season)) +
geom_bar() +
facet_wrap(~season)
bike_sharing %>%
ggplot(aes(weather, fill=season)) +
geom_bar(position="dodge") +
labs(title="weather by season")
bike_sharing %>%
ggplot(aes(season, fill=weather)) +
geom_bar(position="dodge") +
labs(title="Season by weather")